{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "create cluster"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "aws emr create-cluster --applications Name=Ganglia Name=Hadoop Name=Hive Name=Hue Name=Mahout Name=Pig Name=Tez --ec2-attributes '{\"KeyName\":\"aws-first-key\",\"InstanceProfile\":\"EMR_EC2_DefaultRole\",\"SubnetId\":\"subnet-a0a966e8\",\"EmrManagedSlaveSecurityGroup\":\"sg-0bf49ee8aeac4e1e8\",\"EmrManagedMasterSecurityGroup\":\"sg-04a922c1e44be5d9d\"}' --service-role EMR_DefaultRole --enable-debugging --release-label emr-5.29.0 --log-uri 's3n://aws-logs-622309853439-ap-northeast-1/elasticmapreduce/' --name 'My First EMR Cluster' --instance-groups '[{\"InstanceCount\":1,\"EbsConfiguration\":{\"EbsBlockDeviceConfigs\":[{\"VolumeSpecification\":{\"SizeInGB\":32,\"VolumeType\":\"gp2\"},\"VolumesPerInstance\":2}]},\"InstanceGroupType\":\"CORE\",\"InstanceType\":\"m5.xlarge\",\"Name\":\"Core Instance Group\"},{\"InstanceCount\":1,\"EbsConfiguration\":{\"EbsBlockDeviceConfigs\":[{\"VolumeSpecification\":{\"SizeInGB\":32,\"VolumeType\":\"gp2\"},\"VolumesPerInstance\":2}]},\"InstanceGroupType\":\"MASTER\",\"InstanceType\":\"m5.xlarge\",\"Name\":\"Master Instance Group\"}]' --scale-down-behavior TERMINATE_AT_TASK_COMPLETION --region ap-northeast-1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'urllib' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-1-edde01e1b790>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0murllib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0murlretrieve\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"https://s3.amazonaws.com/nyc-tlc/misc/taxi_zones.zip\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"taxi_zones.zip\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mzipfile\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mZipFile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"taxi_zones.zip\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m\"r\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mzip_ref\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[0mzip_ref\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextractall\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"./shape\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'urllib' is not defined"
     ]
    }
   ],
   "source": [
    "urllib.request.urlretrieve(\"https://s3.amazonaws.com/nyc-tlc/misc/taxi_zones.zip\", \"taxi_zones.zip\")\n",
    "with zipfile.ZipFile(\"taxi_zones.zip\",\"r\") as zip_ref:\n",
    "    zip_ref.extractall(\"./shape\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
